/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.uk
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is TestSplitEmittedTerm.java.
*
* The Original Code is Copyright (C) 2004-2011 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
* Richard McCreadie <richardm{a.}dcs.gla.ac.uk> (original author)
* Craig Macdonald <craigm{a.}dcs.gla.ac.uk>
*/
package org.terrier.structures.indexing.singlepass.hadoop;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import junit.framework.TestCase;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETPartitioner;
import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm;
import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETRawComparatorTerm;
import org.terrier.structures.indexing.singlepass.hadoop.SplitEmittedTerm.SETRawComparatorTermSplitFlush;
/** Tests for SplitEmittedTerm, including the Comparators and Partitioners */
@SuppressWarnings("deprecation")
public class TestSplitEmittedTerm extends TestCase {
public void testMethods() throws Exception
{
SplitEmittedTerm t1 = new SplitEmittedTerm("t1", 10, 34);
assertEquals("t1", t1.getTerm());
assertEquals(10, t1.getSplitno());
assertEquals(34, t1.getFlushno());
t1.setFlushno(11);
assertEquals(10, t1.getSplitno());
assertEquals(11, t1.getFlushno());
t1.setSplitno(5);
assertEquals(5, t1.getSplitno());
assertEquals(11, t1.getFlushno());
t1.setTerm("t2");
assertEquals("t2", t1.getTerm());
}
private void checkWritable(final String t, final int split, final int flush) throws Exception
{
SplitEmittedTerm t1 = new SplitEmittedTerm(t, split, flush);
byte[] b = toBytes(t1);
SplitEmittedTerm t2 = new SplitEmittedTerm();
t2.readFields(new DataInputStream(new ByteArrayInputStream(b)));
assertTrue(t1.equals(t2));
assertTrue(t2.equals(t1));
assertEquals(t, t2.getTerm());
assertEquals(split, t2.getSplitno());
assertEquals(flush, t2.getFlushno());
}
public void testWritable() throws Exception
{
checkWritable("t1", 10, 34);
checkWritable("t1", Integer.MAX_VALUE, Integer.MAX_VALUE);
}
private byte[] toBytes(Writable w) throws Exception
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
w.write(dos);
return baos.toByteArray();
}
@edu.umd.cs.findbugs.annotations.SuppressWarnings(
value="DM_STRING_CTOR",
justification="Check String.equals is used, not ==")
private void checkEqualityTerm(String t, int split, int flush) throws Exception
{
SplitEmittedTerm t1 = new SplitEmittedTerm(t, split, flush);
SETRawComparatorTerm compare = new SETRawComparatorTerm();
SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
assertEquals(0, t1.compareTo(t1));
assertTrue(t1.equals(t1));
assertEquals(0, compare.compare(t1, t1));
assertEquals(0, compare2.compare(t1, t1));
byte[] t1w = toBytes(t1);
assertEquals(0, compare.compare(t1w, 0, t1w.length, t1w, 0, t1w.length));
assertEquals(0, compare2.compare(t1w, 0, t1w.length, t1w, 0, t1w.length));
SplitEmittedTerm t1a = new SplitEmittedTerm(new String(t), split, flush);
assertEquals(0, t1.compareTo(t1a));
assertEquals(0, t1a.compareTo(t1));
assertTrue(t1.equals(t1a));
assertTrue(t1a.equals(t1));
assertEquals(0, compare.compare(t1, t1a));
assertEquals(0, compare.compare(t1a, t1));
assertEquals(0, compare2.compare(t1, t1a));
assertEquals(0, compare2.compare(t1a, t1));
}
public void testEqualityTerm() throws Exception
{
checkEqualityTerm("t1", 0, 0);
checkEqualityTerm("t1", Integer.MAX_VALUE, Integer.MAX_VALUE);
}
@edu.umd.cs.findbugs.annotations.SuppressWarnings(
value="DM_STRING_CTOR",
justification="Check String.equals is used, not ==")
private void checkEqualityTermSplit(String t, int split1, int split2, int flush) throws Exception
{
SplitEmittedTerm t1 = new SplitEmittedTerm(t, split1, flush);
SplitEmittedTerm t2 = new SplitEmittedTerm(new String(t), split2, flush);
SETRawComparatorTerm compare = new SETRawComparatorTerm();
SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
assertEquals(0, t1.compareTo(t1));
assertFalse(t1.equals(t2));
assertEquals(0, compare.compare(t1, t2));
assertTrue(compare2.compare(t1, t2) < 0);
byte[] t1w = toBytes(t1);
byte[] t2w = toBytes(t2);
assertEquals(0, compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length));
assertTrue("Comparing t1 to t2 as bytes", compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length)< 0);
}
public void testEqualityTermSplit() throws Exception
{
checkEqualityTermSplit("t1", 0, 1, 0);
checkEqualityTermSplit("t1", Integer.MAX_VALUE -1, Integer.MAX_VALUE, Integer.MAX_VALUE);
}
private void compareTerm(SplitEmittedTerm t1, SplitEmittedTerm t2) throws Exception
{
SETRawComparatorTerm compare = new SETRawComparatorTerm();
//check for inequality of each pair
assertFalse(t1.equals(t2));
assertFalse(t2.equals(t1));
assertTrue(t1.compareTo(t2) < 0);
assertTrue(t2.compareTo(t1) > 0);
assertTrue(compare.compare(t1, t2) < 0);
assertTrue(compare.compare(t2, t1) > 0);
SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
assertTrue(compare2.compare(t1, t2) < 0);
assertTrue(compare2.compare(t2, t1) > 0);
byte[] t1w = toBytes(t1);
byte[] t2w = toBytes(t2);
assertTrue(compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length)< 0);
assertTrue("Comparing t1 to t2 as bytes", compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
}
public void testCompareTerm() throws Exception
{
SplitEmittedTerm t1 = new SplitEmittedTerm("t1", 0, 0);
SplitEmittedTerm t2 = new SplitEmittedTerm("t2", 0, 0);
compareTerm(t1, t2);
t1 = new SplitEmittedTerm("t1", Integer.MAX_VALUE, Integer.MAX_VALUE);
t2 = new SplitEmittedTerm("t2", Integer.MAX_VALUE, Integer.MAX_VALUE);
compareTerm(t1, t2);
}
private void compareTermSplit(SplitEmittedTerm t1, SplitEmittedTerm t2) throws Exception
{
SETRawComparatorTerm compare = new SETRawComparatorTerm();
//check for inequality of each pair
assertFalse(t1.equals(t2));
assertFalse(t2.equals(t1));
assertTrue(t1.compareTo(t2) < 0);
assertTrue(t2.compareTo(t1) > 0);
assertEquals(0, compare.compare(t1, t2));
assertEquals(0, compare.compare(t2, t1));
SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
assertTrue(compare2.compare(t1, t2) < 0);
assertTrue(compare2.compare(t2, t1) > 0);
byte[] t1w = toBytes(t1);
byte[] t2w = toBytes(t2);
assertEquals(0, compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length));
assertEquals(0, compare.compare(t2w, 0, t2w.length, t1w, 0, t1w.length));
assertTrue(compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
assertTrue(compare2.compare(t2w, 0, t2w.length, t1w, 0, t1w.length) > 0);
}
public void testCompareTermSplit() throws Exception
{
SplitEmittedTerm t1 = new SplitEmittedTerm("t1", 0, 0);
SplitEmittedTerm t2 = new SplitEmittedTerm("t1", 1, 0);
compareTermSplit(t1, t2);
t1 = new SplitEmittedTerm("t1", Integer.MAX_VALUE-1, 0);
t2 = new SplitEmittedTerm("t1", Integer.MAX_VALUE, 0);
compareTermSplit(t1, t2);
}
static final int sign(int a)
{
if (a < 0)
return -1;
if (a > 0)
return 1;
return 0;
}
private void compareTermFlush(SplitEmittedTerm t1, SplitEmittedTerm t2) throws Exception
{
SETRawComparatorTerm compare = new SETRawComparatorTerm();
//check for inequality of each pair
assertFalse(t1.equals(t2));
assertFalse(t2.equals(t1));
assertTrue(t1.compareTo(t2) < 0);
assertTrue(t2.compareTo(t1) > 0);
assertEquals(sign(t1.getTerm().compareTo(t2.getTerm())), sign(compare.compare(t1, t2)));
assertEquals(sign(t2.getTerm().compareTo(t1.getTerm())), sign(compare.compare(t2, t1)));
SETRawComparatorTermSplitFlush compare2 = new SETRawComparatorTermSplitFlush();
assertTrue(compare2.compare(t1, t2) < 0);
assertTrue(compare2.compare(t2, t1) > 0);
byte[] t1w = toBytes(t1);
byte[] t2w = toBytes(t2);
assertEquals(sign(t1.getTerm().compareTo(t2.getTerm())), sign(compare.compare(t1w, 0, t1w.length, t2w, 0, t2w.length)));
assertEquals(sign(t2.getTerm().compareTo(t1.getTerm())), sign(compare.compare(t2w, 0, t2w.length, t1w, 0, t1w.length)));
assertTrue(compare2.compare(t1w, 0, t1w.length, t2w, 0, t2w.length) < 0);
assertTrue(compare2.compare(t2w, 0, t2w.length, t1w, 0, t1w.length) > 0);
}
public void testCompareTermFlush() throws Exception
{
SplitEmittedTerm t1,t2;
t1 = new SplitEmittedTerm("t1", 0, 0);
t2 = new SplitEmittedTerm("t1", 0, 1);
compareTermFlush(t1, t2);
t1 = new SplitEmittedTerm(".", 0, 0);
t2 = new SplitEmittedTerm("0", 0, 0);
compareTermFlush(t1, t2);
t1 = new SplitEmittedTerm("0", 0, 0);
t2 = new SplitEmittedTerm("\\", 0, 0);
compareTermFlush(t1, t2);
t1 = new SplitEmittedTerm("t1", 0, Integer.MAX_VALUE -1);
t2 = new SplitEmittedTerm("t1", 0, Integer.MAX_VALUE );
compareTermFlush(t1, t2);
}
/* Test cases for SETPartitionerLowercaseAlphaTerm */
public void testSETPLAT() throws Exception
{
final SETPartitionerLowercaseAlphaTerm p = new SETPartitionerLowercaseAlphaTerm();
//single partition
assertEquals(0, p.calculatePartition('0', 1));
assertEquals(0, p.calculatePartition('9', 1));
assertEquals(0, p.calculatePartition('-', 1));
assertEquals(0, p.calculatePartition('a', 1));
assertEquals(0, p.calculatePartition('z', 1));
assertEquals(0, p.calculatePartition('}', 1));
//two partitions
assertEquals(0, p.calculatePartition('(', 2));
assertEquals(0, p.calculatePartition('.', 2));
assertEquals(0, p.calculatePartition(')', 2));
assertEquals(0, p.calculatePartition('\\', 2));
assertEquals(0, p.calculatePartition('/', 2));
assertEquals(0, p.calculatePartition('0', 2));
assertEquals(0, p.calculatePartition('9', 2));
assertEquals(0, p.calculatePartition('-', 2));
assertEquals(0, p.calculatePartition('a', 2));
assertEquals(0, p.calculatePartition('l', 2));
assertEquals(0, p.calculatePartition('m', 2));
assertEquals(1, p.calculatePartition('n', 2));
assertEquals(1, p.calculatePartition('o', 2));
assertEquals(1, p.calculatePartition('z', 2));
assertEquals(1, p.calculatePartition('}', 2));
//(all upper case goto partition 0)
assertEquals(0, p.calculatePartition('M', 2));
assertEquals(0, p.calculatePartition('N', 2));
assertEquals(0, p.calculatePartition('O', 2));
//three partitions
assertEquals(0, p.calculatePartition('0', 3));
assertEquals(0, p.calculatePartition('9', 3));
assertEquals(0, p.calculatePartition('-', 3));
assertEquals(0, p.calculatePartition('a', 3));
assertEquals(0, p.calculatePartition('h', 3));
assertEquals(0, p.calculatePartition('i', 3));
assertEquals(1, p.calculatePartition('j', 3));
assertEquals(1, p.calculatePartition('r', 3));
assertEquals(2, p.calculatePartition('s', 3));
assertEquals(2, p.calculatePartition('t', 3));
assertEquals(2, p.calculatePartition('u', 3));
assertEquals(2, p.calculatePartition('z', 3));
assertEquals(2, p.calculatePartition('}', 3));
//26 partitions
assertEquals(0, p.calculatePartition('0', 26));
assertEquals(0, p.calculatePartition('9', 26));
assertEquals(0, p.calculatePartition('-', 26));
assertEquals(0, p.calculatePartition('a', 26));
assertEquals(1, p.calculatePartition('b', 26));
assertEquals(2, p.calculatePartition('c', 26));
assertEquals(3, p.calculatePartition('d', 26));
assertEquals(4, p.calculatePartition('e', 26));
assertEquals(5, p.calculatePartition('f', 26));
assertEquals(6, p.calculatePartition('g', 26));
assertEquals(7, p.calculatePartition('h', 26));
assertEquals(8, p.calculatePartition('i', 26));
assertEquals(9, p.calculatePartition('j', 26));
assertEquals(10, p.calculatePartition('k', 26));
assertEquals(11, p.calculatePartition('l', 26));
assertEquals(12, p.calculatePartition('m', 26));
assertEquals(13, p.calculatePartition('n', 26));
assertEquals(14, p.calculatePartition('o', 26));
assertEquals(15, p.calculatePartition('p', 26));
assertEquals(16, p.calculatePartition('q', 26));
assertEquals(17, p.calculatePartition('r', 26));
assertEquals(18, p.calculatePartition('s', 26));
assertEquals(19, p.calculatePartition('t', 26));
assertEquals(20, p.calculatePartition('u', 26));
assertEquals(21, p.calculatePartition('v', 26));
assertEquals(22, p.calculatePartition('w', 26));
assertEquals(23, p.calculatePartition('x', 26));
assertEquals(24, p.calculatePartition('y', 26));
assertEquals(25, p.calculatePartition('z', 26));
}
/* Test cases for SETPartitioner */
/** single map, single reducer */
public void testSMSRCalculatePartition() throws Exception
{
final JobConf j = new JobConf();
j.setNumMapTasks(1);
final SETPartitioner p = new SETPartitioner();
p.configure(j);
assertEquals(0, p.calculatePartition(0, 1));
}
/** multiple map, single reducer */
public void testMMSRCalculatePartition() throws Exception
{
final JobConf j = new JobConf();
final int maptasks = 20;
j.setNumMapTasks(maptasks);
final SETPartitioner p = new SETPartitioner();
p.configure(j);
assertEquals(0, p.calculatePartition(0, 1));
assertEquals(0, p.calculatePartition(19, 1));
assertEquals(0, p.calculatePartition(10, 1));
}
/** multiple map, multiple reducer */
public void testMMMRCalculatePartition() throws Exception
{
final JobConf j = new JobConf();
final int maptasks = 20;
j.setNumMapTasks(maptasks);
final SETPartitioner p = new SETPartitioner();
p.configure(j);
assertEquals(0, p.calculatePartition(0, 2));
assertEquals(0, p.calculatePartition(1, 2));
assertEquals(0, p.calculatePartition(9, 2));
assertEquals(1, p.calculatePartition(10, 2));
assertEquals(1, p.calculatePartition(19, 2));
}
}